

*************************************************************************
* 				PART III. fuel share from MECS							*
*************************************************************************

* 0. clean and prepare *************************************************

* 1. import data and keep variables
use "$MECSenergy/MECS.dta", clear
keep if level <= 3 | level == 6 
keep naics year level firstuse_allpurpose_*_2 
*drop level
	
* drop 1 obs in 2014 (other sector)
drop if naics == 98
* drop the total consumption
drop if naics == 99
replace firstuse_allpurpose_elec_2 = 0 if firstuse_allpurpose_elec_2 < 0 & naics == 324199 // fix me, this industry has some problem, elec usge < 0

	
* 2. interpolate and extrapolate to 1990-2016
	* note: I will interpolate and flat extrapolate all the NAICS3 level data
	* I will interpolate NAICS6 level data.
	
	* a. NAICS3 level
preserve

	* (1) fill the panel
	keep if level == 3
	* 1. add the year to 1990-2016
	local new = _N + 1
    set obs `new'
	replace naics = 311 if naics == .
	replace year = 1990 if year == .
	
	local new = _N + 1
    set obs `new'
	replace naics = 311 if naics == .
	replace year = 2016 if year == .
	
	* fulfill
	tsset naics year
	tsfill, full
	
	
	* (2) interpolate
	sort naics year
	foreach v in "elec" "resdfuel" "distfuel" "gas" "hgl" "coal" "coke" "other" {
		by naics: ipolate firstuse_allpurpose_`v'_2 year, gen(firstuse_allpurpose_`v'_int)
		replace firstuse_allpurpose_`v'_int = 0 if firstuse_allpurpose_`v'_int < 0
		
		* A. flat extrapolate
		* 1. after the last obs
		by naics: carryforward firstuse_allpurpose_`v'_int, gen(firstuse_allpurpose_`v'_1)
		drop firstuse_allpurpose_`v'_int
		rename firstuse_allpurpose_`v'_1 firstuse_allpurpose_`v'_int
		
		* 2. before the 1st obs
		gen nonmissing = 0
		replace nonmissing = 1 if !missing(firstuse_allpurpose_`v'_int)
		sort naics nonmissing year
		by naics nonmissing: gen  A = firstuse_allpurpose_`v'_int[1]
		by naics: egen AA = mean(A)
		replace firstuse_allpurpose_`v'_int = AA if firstuse_allpurpose_`v'_int == .
		drop nonmissing A AA
		
		* B. for industries we only have data for 1 year, record it and replace the entire series to that value
		by naics: egen N_`v' = count(firstuse_allpurpose_`v'_2)
		by naics: egen singleton_`v' = min(firstuse_allpurpose_`v'_2)
		replace firstuse_allpurpose_`v'_int = singleton_`v' if N_`v' == 1
		* only 326 Coal has the singleton problem. It only has data in 1998.
		* Fortunately that industry 326's main energy is gas, and coal counts for less than 1% in 1998.
		
		* C. for industries that have no data for certain type of energy : 
		* Great! No NAICS industry has this problem
		* replace firstuse_allpurpose_`v'_int = 0 if firstuse_allpurpose_`v'_int == .
		
	}
	*
	keep naics year *int
	tempfile NAICS3fullpanel
	save `NAICS3fullpanel.dta', replace
restore

	* b. NAICS 6 level
	* (1) fulfill the panel
	* (1.1) clear
	drop if level == 3
	tsset naics year
	
	* extrapolate for observations with missings in some energy types
	foreach v in "tot" "elec" "resdfuel" "distfuel" "gas" "hgl" "coal" "coke" "other" {
		by naics: ipolate firstuse_allpurpose_`v'_2 year, gen(firstuse_allpurpose_`v'_int)
		replace firstuse_allpurpose_`v'_int = 0 if firstuse_allpurpose_`v'_int < 0
		
		* A. flat extrapolate
		* 1. after the last obs
		by naics: carryforward firstuse_allpurpose_`v'_int, gen(firstuse_allpurpose_`v'_1)
		drop firstuse_allpurpose_`v'_int
		rename firstuse_allpurpose_`v'_1 firstuse_allpurpose_`v'_int
		
		* 2. before the 1st obs
		gen nonmissing = 0
		replace nonmissing = 1 if !missing(firstuse_allpurpose_`v'_int)
		sort naics nonmissing year
		by naics nonmissing: gen  A = firstuse_allpurpose_`v'_int[1]
		by naics: egen AA = mean(A)
		replace firstuse_allpurpose_`v'_int = AA if firstuse_allpurpose_`v'_int == .
		drop nonmissing A AA
		
	}
	*
	egen total = rowtotal(firstuse_allpurpose_elec_int firstuse_allpurpose_resdfuel_int firstuse_allpurpose_distfuel_int firstuse_allpurpose_gas_int firstuse_allpurpose_hgl_int firstuse_allpurpose_coal_int firstuse_allpurpose_coke_int firstuse_allpurpose_other_int)
	gen rate = total/firstuse_allpurpose_tot_int
	drop if rate <0.8
	*drop if firstuse_allpurpose_tot_int == . & rate == .
	drop rate total
	drop *_2
	rename *_int *_2
	
	* (1.2) fulfill
	* 1. add the year to 1990-2016
	local new = _N + 1
    set obs `new'
	replace naics = 311221 if naics == .
	replace year = 1990 if year == .
	
	local new = _N + 1
    set obs `new'
	replace naics = 311221 if naics == .
	replace year = 2016 if year == .
	
	sort naics year
	tsfill, full
	
	* (2) interpolate
	foreach v in "tot" "elec" "resdfuel" "distfuel" "gas" "hgl" "coal" "coke" "other" {
		by naics: ipolate firstuse_allpurpose_`v'_2 year, gen(firstuse_allpurpose_`v'_int) 
		replace firstuse_allpurpose_`v'_int = 0 if firstuse_allpurpose_`v'_int < 0
		
		* A. flat extrapolate
		* 1. after the last obs
		by naics: carryforward firstuse_allpurpose_`v'_int, gen(firstuse_allpurpose_`v'_1)
		drop firstuse_allpurpose_`v'_int
		rename firstuse_allpurpose_`v'_1 firstuse_allpurpose_`v'_int
		
		* 2. before the 1st obs
		gen nonmissing = 0
		replace nonmissing = 1 if !missing(firstuse_allpurpose_`v'_int)
		sort naics nonmissing year
		by naics nonmissing: gen  A = firstuse_allpurpose_`v'_int[1]
		by naics: egen AA = mean(A)
		replace firstuse_allpurpose_`v'_int = AA if firstuse_allpurpose_`v'_int == .
		drop nonmissing A AA
		
		* B. for industries with no data in certain fuel type
		replace firstuse_allpurpose_`v'_int = 0 if firstuse_allpurpose_`v'_int == .
	}
	*
	keep naics year *int
	egen missings = rowmiss(firstuse_allpurpose_elec_int firstuse_allpurpose_resdfuel_int firstuse_allpurpose_distfuel_int firstuse_allpurpose_gas_int firstuse_allpurpose_hgl_int firstuse_allpurpose_coal_int firstuse_allpurpose_coke_int firstuse_allpurpose_other_int)
	* the industries with 1 missing catogery are OK. The 2 industries with 2 missing catogeries 325180 are also OK.
	drop if naics == 324122
	* the value of this industry is too far away from values of other 324*** industries.
	egen total = rowtotal(firstuse_allpurpose_elec_int firstuse_allpurpose_resdfuel_int firstuse_allpurpose_distfuel_int firstuse_allpurpose_gas_int firstuse_allpurpose_hgl_int firstuse_allpurpose_coal_int firstuse_allpurpose_coke_int firstuse_allpurpose_other_int)
	gen rate = total/firstuse_allpurpose_tot_int
	sum rate
	drop rate total missings
	
	* (3). merge-in 3 digit level
	append using `NAICS3fullpanel.dta'
	drop firstuse_allpurpose_tot_int
	
* I. time-variant share *************************************************
preserve

	egen total_usage = rowtotal(firstuse_allpurpose_elec_int firstuse_allpurpose_resdfuel_int firstuse_allpurpose_distfuel_int ///
			firstuse_allpurpose_gas_int firstuse_allpurpose_hgl_int firstuse_allpurpose_coal_int ///
			firstuse_allpurpose_coke_int firstuse_allpurpose_other_int)
	
	gen share_RF = firstuse_allpurpose_resdfuel_int/total_usage
	gen share_DF = firstuse_allpurpose_distfuel_int/total_usage
	gen share_NG = firstuse_allpurpose_gas_int/total_usage
	gen share_HL = firstuse_allpurpose_hgl_int/total_usage
	gen share_CL = firstuse_allpurpose_coal_int/total_usage
	gen share_PC = firstuse_allpurpose_coke_int/total_usage
	gen share_PE = firstuse_allpurpose_other_int/total_usage

	gen share_elec = firstuse_allpurpose_elec_int / total_usage
	
	rename naics naics_mecs
	keep share* year naics_mecs
	* save
	tempfile MECS_share
	save `MECS_share.dta', replace
restore

* II. time-invariant share *************************************************
preserve
	/*
	keep if year == 1998 | year == 2002 | year == 2006 | year == 2010 | year == 2014
	rename naics naics_mecs
	
	keep naics_mecs year firstuse_allpurpose_elec_int  firstuse_allpurpose_resdfuel_int firstuse_allpurpose_distfuel_int ///
			firstuse_allpurpose_gas_int firstuse_allpurpose_hgl_int firstuse_allpurpose_coal_int ///
			firstuse_allpurpose_coke_int firstuse_allpurpose_other_int
	rename *_int *_2
	* collapse across year
	collapse (mean) firstuse_allpurpose_elec_2  firstuse_allpurpose_resdfuel_2 firstuse_allpurpose_distfuel_2 ///
			firstuse_allpurpose_gas_2 firstuse_allpurpose_hgl_2 firstuse_allpurpose_coal_2 ///
			firstuse_allpurpose_coke_2 firstuse_allpurpose_other_2 ///
			, by(naics_mecs)
			
	* for industries that have no data for certain type of energy 
	foreach v in "elec" "resdfuel" "distfuel" "gas" "hgl" "coal" "coke" "other" {
		replace firstuse_allpurpose_`v'_2 = 0 if firstuse_allpurpose_`v'_2 == .
	}
	*/
	keep if year == 2007
	rename naics naics_mecs
	
	keep naics_mecs year firstuse_allpurpose_elec_int  firstuse_allpurpose_resdfuel_int firstuse_allpurpose_distfuel_int ///
			firstuse_allpurpose_gas_int firstuse_allpurpose_hgl_int firstuse_allpurpose_coal_int ///
			firstuse_allpurpose_coke_int firstuse_allpurpose_other_int
	rename *_int *_2
	
	* collapse across year
	drop year
	
	egen total_usage = rowtotal(firstuse_allpurpose_elec_2 firstuse_allpurpose_resdfuel_2 firstuse_allpurpose_distfuel_2 ///
			firstuse_allpurpose_gas_2 firstuse_allpurpose_hgl_2 firstuse_allpurpose_coal_2 ///
			firstuse_allpurpose_coke_2 firstuse_allpurpose_other_2)
	
	gen share_RF_inv = firstuse_allpurpose_resdfuel_2/total_usage
	gen share_DF_inv = firstuse_allpurpose_distfuel_2/total_usage
	gen share_NG_inv = firstuse_allpurpose_gas_2/total_usage
	gen share_HL_inv = firstuse_allpurpose_hgl_2/total_usage
	gen share_CL_inv = firstuse_allpurpose_coal_2/total_usage
	gen share_PC_inv = firstuse_allpurpose_coke_2/total_usage
	gen share_PE_inv = firstuse_allpurpose_other_2/total_usage

	gen share_elec_inv = firstuse_allpurpose_elec_2 / total_usage
	
	keep share* naics_mecs
	
	* save
	tempfile MECS_share_inv
	save `MECS_share_inv.dta', replace
restore

* II', use 2010 fixed share *************************************************
preserve

	keep if year == 2010
	rename naics naics_mecs
	
	keep naics_mecs year firstuse_allpurpose_elec_int  firstuse_allpurpose_resdfuel_int firstuse_allpurpose_distfuel_int ///
			firstuse_allpurpose_gas_int firstuse_allpurpose_hgl_int firstuse_allpurpose_coal_int ///
			firstuse_allpurpose_coke_int firstuse_allpurpose_other_int
	rename *_int *_2
	
	* collapse across year
	drop year
	
	egen total_usage = rowtotal(firstuse_allpurpose_elec_2 firstuse_allpurpose_resdfuel_2 firstuse_allpurpose_distfuel_2 ///
			firstuse_allpurpose_gas_2 firstuse_allpurpose_hgl_2 firstuse_allpurpose_coal_2 ///
			firstuse_allpurpose_coke_2 firstuse_allpurpose_other_2)
	
	gen share_RF_inv = firstuse_allpurpose_resdfuel_2/total_usage
	gen share_DF_inv = firstuse_allpurpose_distfuel_2/total_usage
	gen share_NG_inv = firstuse_allpurpose_gas_2/total_usage
	gen share_HL_inv = firstuse_allpurpose_hgl_2/total_usage
	gen share_CL_inv = firstuse_allpurpose_coal_2/total_usage
	gen share_PC_inv = firstuse_allpurpose_coke_2/total_usage
	gen share_PE_inv = firstuse_allpurpose_other_2/total_usage

	gen share_elec_inv = firstuse_allpurpose_elec_2 / total_usage
	
	keep share* naics_mecs
	rename share*inv share*2010
	* save
	tempfile MECS_share_2010
	save `MECS_share_2010.dta', replace
restore


**************************************************************************************************
* 3. merge in time-variant and time-invariant share
**************************************************************************************************
use `MECS_share.dta', clear
merge m:1 naics_mecs using `MECS_share_inv.dta'
drop _merge
	
merge m:1 naics_mecs using `MECS_share_2010.dta'
drop _merge
	

